From fd6faa493baa986e43d011f7bc84c3fe3b811b2b Mon Sep 17 00:00:00 2001
From: Mark H Weaver <mhw@netris.org>
Date: Thu, 6 Aug 2015 01:28:10 -0400
Subject: [PATCH 3/9] Support Loongson2f floating-point instructions in
 mips/math-emu.

* (arch/mips/include/asm/inst.h): Add Loongson2f function field values
  for madd/msub/nmadd/nmsub that use the spec2 opcode, and the
  Loongson2f/MIPS-5 format field value for paired-single
  floating-point operations.

* (arch/mips/math-emu/cp1emu.c): Add support for the Loongson2f
  instructions for madd/msub/nmadd/nmsub, which use the spec2 opcode.
  Also add support for the Loongson2f instructions that use the
  paired-single floating-point format.
---
 arch/mips/include/uapi/asm/inst.h |   4 +-
 arch/mips/math-emu/cp1emu.c       | 281 +++++++++++++++++++++++++++++++++++++-
 2 files changed, 283 insertions(+), 2 deletions(-)

diff --git a/arch/mips/include/uapi/asm/inst.h b/arch/mips/include/uapi/asm/inst.h
index 9b6ccbd..99e6430 100644
--- a/arch/mips/include/uapi/asm/inst.h
+++ b/arch/mips/include/uapi/asm/inst.h
@@ -65,6 +65,8 @@ enum spec_op {
 enum spec2_op {
 	madd_op, maddu_op, mul_op, spec2_3_unused_op,
 	msub_op, msubu_op, /* more unused ops */
+	loongson_madd_op = 0x18, loongson_msub_op,
+	loongson_nmadd_op, loongson_nmsub_op,
 	clz_op = 0x20, clo_op,
 	dclz_op = 0x24, dclo_op,
 	sdbpp_op = 0x3f
@@ -151,7 +153,7 @@ enum cop0_com_func {
  */
 enum cop1_fmt {
 	s_fmt, d_fmt, e_fmt, q_fmt,
-	w_fmt, l_fmt
+	w_fmt, l_fmt, ps_fmt
 };
 
 /*
diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c
index ed82f0e..86ec4f2 100644
--- a/arch/mips/math-emu/cp1emu.c
+++ b/arch/mips/math-emu/cp1emu.c
@@ -7,6 +7,9 @@
  * Kevin D. Kissell, kevink@mips.com and Carsten Langgaard, carstenl@mips.com
  * Copyright (C) 2000  MIPS Technologies, Inc.
  *
+ * Loongson instruction support
+ * Copyright (C) 2011  Mark H Weaver <mhw@netris.org>
+ *
  *  This program is free software; you can distribute it and/or modify it
  *  under the terms of the GNU General Public License (Version 2) as
  *  published by the Free Software Foundation.
@@ -61,6 +64,11 @@ static int fpu_emu(struct pt_regs *, struct mips_fpu_struct *,
 static int fpux_emu(struct pt_regs *,
 	struct mips_fpu_struct *, mips_instruction, void *__user *);
 
+#ifdef CONFIG_MACH_LOONGSON
+static int loongson_spec2_emu(struct pt_regs *,
+	struct mips_fpu_struct *, mips_instruction, void *__user *);
+#endif
+
 /* Control registers */
 
 #define FPCREG_RID	0	/* $0  = revision id */
@@ -843,6 +851,14 @@ do {									\
 #define DPFROMREG(dp, x)	DIFROMREG((dp).bits, x)
 #define DPTOREG(dp, x)	DITOREG((dp).bits, x)
 
+/* Support for Loongson paired single floating-point format */
+#define PSIFROMREG(si1, si2, x) ({ u64 di; DIFROMREG(di, x);		\
+			(si1) = (u32)di; (si2) = (u32)(di >> 32); })
+#define PSITOREG(si1, si2, x) DITOREG((si1) | ((u64)(si2) << 32), x)
+
+#define PSPFROMREG(sp1, sp2, x) PSIFROMREG((sp1).bits, (sp2).bits, x)
+#define PSPTOREG(sp1, sp2, x)	PSITOREG((sp1).bits, (sp2).bits, x)
+
 /*
  * Emulate a CFC1 instruction.
  */
@@ -1367,6 +1383,16 @@ emul:
 			xcp->regs[MIPSInst_RD(ir)] =
 				xcp->regs[MIPSInst_RS(ir)];
 		break;
+
+#ifdef CONFIG_MACH_LOONGSON
+	case spec2_op:{
+		int sig = loongson_spec2_emu(xcp, ctx, ir, fault_addr);
+		if (sig)
+			return sig;
+		break;
+	}
+#endif
+
 	default:
 sigill:
 		return SIGILL;
@@ -1444,6 +1470,172 @@ DEF3OP(msub, dp, ieee754dp_mul, ieee754dp_sub, );
 DEF3OP(nmadd, dp, ieee754dp_mul, ieee754dp_add, ieee754dp_neg);
 DEF3OP(nmsub, dp, ieee754dp_mul, ieee754dp_sub, ieee754dp_neg);
 
+#ifdef CONFIG_MACH_LOONGSON
+static int loongson_spec2_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
+	mips_instruction ir, void *__user *fault_addr)
+{
+	int rfmt;		/* resulting format */
+	unsigned rcsr = 0;	/* resulting csr */
+	union {
+		union ieee754dp d;
+		struct {
+			union ieee754sp s;
+			union ieee754sp s2;
+		};
+	} rv;			/* resulting value */
+
+	/* XXX maybe add a counter for loongson spec2 fp instructions? */
+	/* MIPS_FPU_EMU_INC_STATS(cp1xops); */
+
+	switch (rfmt = (MIPSInst_FFMT(ir) & 0xf)) {
+	case s_fmt:{
+		union ieee754sp(*handler) (union ieee754sp, union ieee754sp, union ieee754sp);
+		union ieee754sp fd, fs, ft;
+
+		switch (MIPSInst_FUNC(ir)) {
+		case loongson_madd_op:
+			handler = fpemu_sp_madd;
+			goto scoptop;
+		case loongson_msub_op:
+			handler = fpemu_sp_msub;
+			goto scoptop;
+		case loongson_nmadd_op:
+			handler = fpemu_sp_nmadd;
+			goto scoptop;
+		case loongson_nmsub_op:
+			handler = fpemu_sp_nmsub;
+			goto scoptop;
+
+		      scoptop:
+			SPFROMREG(fd, MIPSInst_FD(ir));
+			SPFROMREG(fs, MIPSInst_FS(ir));
+			SPFROMREG(ft, MIPSInst_FT(ir));
+			rv.s = (*handler) (fd, fs, ft);
+
+		      copcsr:
+			if (ieee754_cxtest(IEEE754_INEXACT))
+				rcsr |= FPU_CSR_INE_X | FPU_CSR_INE_S;
+			if (ieee754_cxtest(IEEE754_UNDERFLOW))
+				rcsr |= FPU_CSR_UDF_X | FPU_CSR_UDF_S;
+			if (ieee754_cxtest(IEEE754_OVERFLOW))
+				rcsr |= FPU_CSR_OVF_X | FPU_CSR_OVF_S;
+			if (ieee754_cxtest(IEEE754_INVALID_OPERATION))
+				rcsr |= FPU_CSR_INV_X | FPU_CSR_INV_S;
+
+			break;
+
+		default:
+			return SIGILL;
+		}
+		break;
+	}
+
+	case d_fmt:{
+		union ieee754dp(*handler) (union ieee754dp, union ieee754dp, union ieee754dp);
+		union ieee754dp fd, fs, ft;
+
+		switch (MIPSInst_FUNC(ir)) {
+		case loongson_madd_op:
+			handler = fpemu_dp_madd;
+			goto dcoptop;
+		case loongson_msub_op:
+			handler = fpemu_dp_msub;
+			goto dcoptop;
+		case loongson_nmadd_op:
+			handler = fpemu_dp_nmadd;
+			goto dcoptop;
+		case loongson_nmsub_op:
+			handler = fpemu_dp_nmsub;
+			goto dcoptop;
+
+		      dcoptop:
+			DPFROMREG(fd, MIPSInst_FD(ir));
+			DPFROMREG(fs, MIPSInst_FS(ir));
+			DPFROMREG(ft, MIPSInst_FT(ir));
+			rv.d = (*handler) (fd, fs, ft);
+			goto copcsr;
+
+		default:
+			return SIGILL;
+		}
+		break;
+	}
+
+	case ps_fmt:{
+		union ieee754sp(*handler) (union ieee754sp, union ieee754sp, union ieee754sp);
+		struct _ieee754_csr ieee754_csr_save;
+		union ieee754sp fd1, fs1, ft1;
+		union ieee754sp fd2, fs2, ft2;
+
+		switch (MIPSInst_FUNC(ir)) {
+		case loongson_madd_op:
+			handler = fpemu_sp_madd;
+			goto pscoptop;
+		case loongson_msub_op:
+			handler = fpemu_sp_msub;
+			goto pscoptop;
+		case loongson_nmadd_op:
+			handler = fpemu_sp_nmadd;
+			goto pscoptop;
+		case loongson_nmsub_op:
+			handler = fpemu_sp_nmsub;
+			goto pscoptop;
+
+		      pscoptop:
+			PSPFROMREG(fd1, fd2, MIPSInst_FD(ir));
+			PSPFROMREG(fs1, fs2, MIPSInst_FS(ir));
+			PSPFROMREG(ft1, ft2, MIPSInst_FT(ir));
+			rv.s = (*handler) (fd1, fs1, ft1);
+			ieee754_csr_save = ieee754_csr;
+			rv.s2 = (*handler) (fd2, fs2, ft2);
+			ieee754_csr.cx |= ieee754_csr_save.cx;
+			ieee754_csr.sx |= ieee754_csr_save.sx;
+			goto copcsr;
+
+		default:
+			return SIGILL;
+		}
+		break;
+	}
+
+	default:
+		return SIGILL;
+	}
+
+	/*
+	 * Update the fpu CSR register for this operation.
+	 * If an exception is required, generate a tidy SIGFPE exception,
+	 * without updating the result register.
+	 * Note: cause exception bits do not accumulate, they are rewritten
+	 * for each op; only the flag/sticky bits accumulate.
+	 */
+	ctx->fcr31 = (ctx->fcr31 & ~FPU_CSR_ALL_X) | rcsr;
+	if ((ctx->fcr31 >> 5) & ctx->fcr31 & FPU_CSR_ALL_E) {
+		/*printk ("SIGFPE: fpu csr = %08x\n",ctx->fcr31); */
+		return SIGFPE;
+	}
+
+	/*
+	 * Now we can safely write the result back to the register file.
+	 */
+	switch (rfmt) {
+	case d_fmt:
+		DPTOREG(rv.d, MIPSInst_FD(ir));
+		break;
+	case s_fmt:
+		SPTOREG(rv.s, MIPSInst_FD(ir));
+		break;
+	case ps_fmt:
+		PSPTOREG(rv.s, rv.s2, MIPSInst_FD(ir));
+		break;
+	default:
+		return SIGILL;
+	}
+
+	return 0;
+}
+#endif
+
 static int fpux_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
 	mips_instruction ir, void *__user *fault_addr)
 {
@@ -1654,7 +1846,12 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
 	unsigned cond;
 	union {
 		union ieee754dp d;
-		union ieee754sp s;
+		struct {
+			union ieee754sp s;
+#ifdef CONFIG_MACH_LOONGSON
+			union ieee754sp s2; /* for Loongson paired singles */
+#endif
+		};
 		int w;
 		s64 l;
 	} rv;			/* resulting value */
@@ -2065,6 +2262,83 @@ dcopuop:
 		break;
 	}
 
+#ifdef CONFIG_MACH_LOONGSON
+	case ps_fmt:{		/* 6 */
+		/* Support for Loongson paired single fp instructions */
+		union {
+			union ieee754sp(*b) (union ieee754sp, union ieee754sp);
+			union ieee754sp(*u) (union ieee754sp);
+		} handler;
+
+		switch (MIPSInst_FUNC(ir)) {
+			/* binary ops */
+		case fadd_op:
+			handler.b = ieee754sp_add;
+			goto pscopbop;
+		case fsub_op:
+			handler.b = ieee754sp_sub;
+			goto pscopbop;
+		case fmul_op:
+			handler.b = ieee754sp_mul;
+			goto pscopbop;
+
+			/* unary  ops */
+		case fabs_op:
+			handler.u = ieee754sp_abs;
+			goto pscopuop;
+		case fneg_op:
+			handler.u = ieee754sp_neg;
+			goto pscopuop;
+		case fmov_op:
+			/* an easy one */
+			PSPFROMREG(rv.s, rv.s2, MIPSInst_FS(ir));
+			break;
+
+		      pscopbop: /* paired binary op handler */
+			{
+				struct _ieee754_csr ieee754_csr_save;
+				union ieee754sp fs1, ft1;
+				union ieee754sp fs2, ft2;
+
+				PSPFROMREG(fs1, fs2, MIPSInst_FS(ir));
+				PSPFROMREG(ft1, ft2, MIPSInst_FT(ir));
+				rv.s  = (*handler.b) (fs1, ft1);
+				ieee754_csr_save = ieee754_csr;
+				rv.s2 = (*handler.b) (fs2, ft2);
+				ieee754_csr.cx |= ieee754_csr_save.cx;
+				ieee754_csr.sx |= ieee754_csr_save.sx;
+				goto copcsr;
+			}
+		      pscopuop: /* paired unary op handler */
+			{
+				struct _ieee754_csr ieee754_csr_save;
+				union ieee754sp fs1;
+				union ieee754sp fs2;
+
+				PSPFROMREG(fs1, fs2, MIPSInst_FS(ir));
+				rv.s  = (*handler.u) (fs1);
+				ieee754_csr_save = ieee754_csr;
+				rv.s2 = (*handler.u) (fs2);
+				ieee754_csr.cx |= ieee754_csr_save.cx;
+				ieee754_csr.sx |= ieee754_csr_save.sx;
+				goto copcsr;
+			}
+			break;
+
+		default:
+			if (MIPSInst_FUNC(ir) >= fcmp_op) {
+				/* Loongson fp hardware handles all
+				   cases of fp compare insns, so we
+				   shouldn't have to */
+				printk ("Loongson paired-single fp compare"
+					" unimplemented in cp1emu.c\n");
+			}
+			return SIGILL;
+		}
+		break;
+	}
+#endif
+
 	case l_fmt:
 
 		if (!cpu_has_mips_3_4_5_64_r2_r6)
@@ -2136,6 +2410,11 @@ dcopuop:
 
 		DITOREG(rv.l, MIPSInst_FD(ir));
 		break;
+#ifdef CONFIG_MACH_LOONGSON
+	case ps_fmt:
+		PSPTOREG(rv.s, rv.s2, MIPSInst_FD(ir));
+		break;
+#endif
 	default:
 		return SIGILL;
 	}
-- 
2.4.3

